package com.example.novelspringbootmysql.util;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import java.util.ArrayList;
import java.util.List;

public class ArticleSpider {
    public static void main(String[] args) {
        //传入你喜欢的小说的地址
        List<String> urls = ArticleSpider.inputUrlGetAUrl("https://www.23hh.com/book/24/24035/");
        for (String url: urls) {
            System.out.println(url);
        }
    }

    /**
     * 得到每一章的文字
     * @param url
     * @return
     */
    public static List<String> inputUrlGetAUrl(String url) {
        List<String> list = new ArrayList<>();
        try {
            Document document = Jsoup.connect(url).get();
            int i = 3184;
            //拿到列表第一个链接
            String listUrl;
            while (i < 3184) {
                listUrl = document.select("body>div>dl>dd>a").get(i).attr("abs:href");
                list.add(listUrl);
                if (i == 2200 || i == 2700 ||i == 3100) {
                    System.out.println("正在读取链接。。。。");
                }
                i++;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return getValue(list);
    }

    private static List<String> getValue(List<String> urls){
        List<String> contentList = new ArrayList<>();
        int i = 2171;
        try {
            for (String url : urls) {
                Document document = Jsoup.connect(url).get();
                Element elm = document.getElementById("content");
                String content = elm.text().replaceAll("    ", "\n").replaceAll("。","。\n");
                if (i == 2200 || i == 2700 ||i == 3100) {
                    System.out.println("正在根据链接读取文字。。。。");
                }
                i++;
                contentList.add(content);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return contentList;
    }
}

